{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Neeps- Medium"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "········\n"
     ]
    }
   ],
   "source": [
    "import getpass\n",
    "import psycopg2\n",
    "from sqlalchemy import create_engine\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "pwd = getpass.getpass()\n",
    "engine = create_engine(\n",
    "    'postgresql+psycopg2://postgres:%s@192.168.31.31:15432/sqlzoo' % (pwd))\n",
    "pd.set_option('display.max_rows', 60)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ut_staff = pd.read_sql_table('ut_staff', engine)\n",
    "ut_student = pd.read_sql_table('ut_student', engine)\n",
    "ut_event = pd.read_sql_table('ut_event', engine)\n",
    "ut_room = pd.read_sql_table('ut_room', engine)\n",
    "ut_attends = pd.read_sql_table('ut_attends', engine)\n",
    "ut_teaches = pd.read_sql_table('ut_teaches', engine)\n",
    "ut_occurs = pd.read_sql_table('ut_occurs', engine)\n",
    "ut_modle = pd.read_sql_table('ut_modle', engine)\n",
    "ut_week = pd.read_sql_table('ut_week', engine)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6.\n",
    "**Show the 'size' of each of the co72010 events. Size is the total number of students attending each event.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>sze</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>co72013.L01</td>\n",
       "      <td>215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>co72013.T02</td>\n",
       "      <td>49</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>co72013.T04</td>\n",
       "      <td>45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>co72013.T01</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>co72013.T05</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>co72013.T06</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>co72013.L02</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>co72013.T03</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      event_id  sze\n",
       "0  co72013.L01  215\n",
       "3  co72013.T02   49\n",
       "5  co72013.T04   45\n",
       "2  co72013.T01   40\n",
       "6  co72013.T05   35\n",
       "7  co72013.T06   29\n",
       "1  co72013.L02   10\n",
       "4  co72013.T03   10"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = (ut_event.rename(columns={'id': 'event_id'})\n",
    "     .loc[ut_event['modle']=='co72010']\n",
    "     .merge(ut_attends, left_on='event_id', right_on='event')\n",
    "     .merge(ut_student, left_on='student', right_on='id'))\n",
    "(a.groupby('event_id')['sze'].sum().reset_index()\n",
    " .sort_values('sze', ascending=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7.\n",
    "**For each post-graduate module, show the size of the teaching team. (post graduate modules start with the code co7).**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>staff</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>co72002</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>co72003</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>co72004</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>co72005</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>co72006</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>co72010</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>co72011</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>co72012</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>co72016</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>co72017</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>co72018</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>co72021</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>co72026</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>co72033</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id  staff\n",
       "0   co72002      1\n",
       "1   co72003      2\n",
       "2   co72004      1\n",
       "3   co72005      2\n",
       "4   co72006      1\n",
       "5   co72010      2\n",
       "6   co72011      2\n",
       "7   co72012      1\n",
       "8   co72016      2\n",
       "9   co72017      1\n",
       "10  co72018      1\n",
       "11  co72021      1\n",
       "12  co72026      2\n",
       "13  co72033      1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = (ut_teaches.merge(ut_event.rename(columns={'id': 'evt_id'}), \n",
    "                      left_on='event', right_on='evt_id')\n",
    "     .merge(ut_modle.loc[ut_modle['id'].str.contains('^co7')], \n",
    "            left_on='modle', right_on='id')\n",
    "    [['id', 'staff']].drop_duplicates())\n",
    "t.groupby('id')['staff'].count().reset_index()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8.\n",
    "**Give the full name of those modules which include events taught for fewer than 10 weeks.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>modle_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>Interactivity and the Internet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>Internet Multimedia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>Languages and Algorithms</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>Project</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         modle_name\n",
       "127  Interactivity and the Internet\n",
       "149             Internet Multimedia\n",
       "102        Languages and Algorithms\n",
       "109                         Project"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = (ut_modle.rename(columns={'name': 'modle_name'})\n",
    "     .merge(ut_event.rename(columns={'id': 'evt_id'}), \n",
    "            left_on='id', right_on='modle')\n",
    "     .merge(ut_occurs, left_on='evt_id', right_on='event'))\n",
    "b = (a.groupby(['evt_id', 'modle_name'])['event'].count()\n",
    "     .reset_index().sort_values('modle_name'))\n",
    "b.loc[b['event']<10, ['modle_name']].drop_duplicates()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9.\n",
    "**Identify those events which start at the same time as one of the co72010 lectures.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id_x</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>co12004.T04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>co12004.T05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>co12005.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>co12005.T04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>co12006.L03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>co12008.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>co12012.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>co22005.T02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>280</th>\n",
       "      <td>co22005.T04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>217</th>\n",
       "      <td>co22005.T07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>co22005.T08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>324</th>\n",
       "      <td>co22006.L02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>91</th>\n",
       "      <td>co22008.T03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>354</th>\n",
       "      <td>co22008.T04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>co32011.T03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>co32014.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>340</th>\n",
       "      <td>co32016.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291</th>\n",
       "      <td>co32018.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>co32021.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>co42001.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>365</th>\n",
       "      <td>co42010.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>331</th>\n",
       "      <td>co42015.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>co72006.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>292</th>\n",
       "      <td>co72016.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>314</th>\n",
       "      <td>co72018.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>coh2451.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>coh8412555.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>243</th>\n",
       "      <td>coh8412555.T02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>294</th>\n",
       "      <td>coh8412585.T03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>coh8412605.L01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>374</th>\n",
       "      <td>coh8412605.T01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>coh8412615.T03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>393</th>\n",
       "      <td>coh8412615.T05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               id_x\n",
       "0       co12004.T04\n",
       "62      co12004.T05\n",
       "162     co12005.T01\n",
       "192     co12005.T04\n",
       "45      co12006.L03\n",
       "133     co12008.L01\n",
       "6       co12012.T01\n",
       "31      co22005.T02\n",
       "280     co22005.T04\n",
       "217     co22005.T07\n",
       "179     co22005.T08\n",
       "324     co22006.L02\n",
       "91      co22008.T03\n",
       "354     co22008.T04\n",
       "145     co32011.T03\n",
       "71      co32014.T01\n",
       "340     co32016.L01\n",
       "291     co32018.L01\n",
       "65      co32021.L01\n",
       "194     co42001.L01\n",
       "365     co42010.T01\n",
       "331     co42015.T01\n",
       "122     co72006.L01\n",
       "292     co72016.T01\n",
       "314     co72018.T01\n",
       "130     coh2451.T01\n",
       "96   coh8412555.L01\n",
       "243  coh8412555.T02\n",
       "294  coh8412585.T03\n",
       "211  coh8412605.L01\n",
       "374  coh8412605.T01\n",
       "186  coh8412615.T03\n",
       "393  coh8412615.T05"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = ut_event.merge(ut_occurs, left_on='id', right_on='event')\n",
    "t['time'] = t[['week', 'dow', 'tod']].agg('-'.join, axis=1)\n",
    "(t.loc[t['modle'] != 'co72010'].merge(t.loc[t['modle'] == 'co72010'], on='time')[['id_x']]\n",
    " .sort_values('id_x').drop_duplicates())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10.\n",
    "**How many members of staff have contact time which is greater than the average?**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   count\n",
       "0     19"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = (ut_event.merge(ut_teaches, left_on='id', right_on='event')\n",
    "     .merge(ut_occurs, left_on='id', right_on='event')\n",
    "     .groupby('staff')['duration'].sum().reset_index())\n",
    "pd.DataFrame({'count': [\n",
    "    t.loc[t['duration'] > sum(t['duration'])/ut_teaches['staff'].nunique()].shape[0]]})\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
